# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
import urlparse
from cars_forum.items import CarsForumItem
import datetime
from scrapy.loader.processors import MapCompose, Join
import re

class BitautoSpider(scrapy.Spider):

    name = "bitauto"
    allowed_domains = ["bitauto.com"]
    start_urls = (
        'http://ask.bitauto.com/browse/' + str(i) + '/' for i in range(1, 4939)
    )

    def parse(self, response):
        """
        爬去列表页和下一页

        :param response:
        :return:
        """
        # 下一页
        current_page = response.meta.get('current_page') or 1
        next_page = current_page + 1
        if re.search(r'/p\d+', response.url):
            url = re.sub(r'/p\d+', '/p'+ str(next_page), response.url)
        elif response.url.endswith('/'):
            url = response.url + 'p' + str(next_page)
        else:
            url = response.url + '/p' + str(next_page)
        yield Request(url, meta={"current_page": next_page}, callback=self.parse)

        # 列表页
        item_selector = response.xpath('//*[@id="data_box0_0"]//@href')
        for url in item_selector.extract():
            yield Request(urlparse.urljoin(response.url, url), callback=self.parse_page)


    def parse_page(self, response):
        """
        详情页的爬取
        :param response:
        :return:
        """
        next_selector = response.xpath('//a[contains(@class, "next_on")]//@href')
        for url in next_selector.extract():
            yield Request(urlparse.urljoin(response.url, url))


        brand = response.xpath('//div[contains(@class, "user-box")]//a[contains(@href, "/browse")][1]//text()').extract()
        car_type = response.xpath('//div[contains(@class, "user-box")]//a[contains(@href, "/browse")][2]//text()').extract()
        main_type = response.xpath('//div[contains(@class, "user-box")]//a[contains(@href, "/browse")][3]//text()').extract()
        sub_type = response.xpath('//div[contains(@class, "user-box")]//a[contains(@href, "/browse")][4]//text()').extract()

        title = response.xpath('//*[@id="form1"]//h1//text()').extract()
        question = response.xpath('//*[@id="form1"]//div[contains(@class, "ask-con")]//text()').extract()


        if response.xpath('//*[@id="form1"]//div[contains(@class, "ask-con")]//img'):
            question_img = [u'有']
        else:
            question_img = [u'无']


        searcher = re.search(r'\d+', response.url)
        topic_id =  searcher.group(0) if searcher else u"无"

        kwargs = dict(
            brand=brand,
            car_type=car_type,
            main_type=main_type,
            sub_type=sub_type,
            title=title,
            question=question,
            question_img=question_img,
            topic_id = topic_id
        )


        replay_selector = response.xpath('//div[contains(@class, "hd_txt_box")]')

        if replay_selector:
            url = response.url
            for replay in replay_selector:
                yield self.parse_replay(url, replay, **kwargs)
        else:
            i = CarsForumItem()
            i['topic_id'] = [topic_id]
            i['brand'] = MapCompose(unicode.strip, unicode.title)(brand)
            i['car_type'] = MapCompose(unicode.strip, unicode.title)(car_type)
            i['type'] = MapCompose(unicode.strip, unicode.title)(main_type)
            i['sub_type'] = MapCompose(unicode.strip, unicode.title)(sub_type)

            i['title'] = MapCompose(unicode.strip, unicode.title)(title)
            i['question']  = MapCompose(unicode.strip, unicode.title)(question)
            i['question_img'] = MapCompose(unicode.strip, unicode.title)(question_img)

            i['replay'] = [u"无"]
            i['replay_to'] = [u"无"]
            i['replay_img'] = [u'无']
            i['url'] = [response.url]
            i['datetime'] = [datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')]
            yield i

    def parse_replay(self, url, replay, **kwargs):
        """
        爬去回复的内容
        :param url:
        :param kwargs:
        :return:
        """

        i = CarsForumItem()
        replay_content = replay.xpath('text()').extract()
        if replay.xpath('img').extract():
            replay_img = [u'有']
        else:
            replay_img =[u'无']

        i['brand'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('brand'))
        i['car_type'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('car_type'))
        i['type'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('main_type'))
        i['sub_type'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('sub_type'))

        i['title'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('title'))
        i['question'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('question'))
        i['question_img'] = MapCompose(unicode.strip, unicode.title)(kwargs.get('question_img'))

        i['replay'] = MapCompose(unicode.strip, unicode.title)(replay_content)
        i['replay_to'] = [u'楼主的帖子']
        i['replay_img'] = replay_img
        i['url'] = [url]
        i['topic_id'] = [kwargs.get('topic_id')]
        i['datetime'] = [datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')]
        return i